In [ ]:
## Install libraries
!pip install xarray pystac_client geopandas dask "dask[dataframe]" planetary-computer odc-stac odc-geo mapclassify spyndex zarr
In [1]:
import xarray as xr
import pystac_client
import odc.stac
from odc.geo.geobox import GeoBox
from dask.diagnostics import ProgressBar
from dask.distributed import Client, LocalCluster
import geopandas as gpd
import planetary_computer as pc
import odc.geo.xr
import spyndex
import os
from xrspatial import hillshade, slope, aspect
In [2]:
## (Optional) GDAL config
env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR',
AWS_NO_SIGN_REQUEST='YES',
GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
GDAL_SWATH_SIZE='200000000',
VSI_CURL_CACHE_SIZE='200000000')
os.environ.update(env)
In [3]:
## Start Dask Client
client = Client(processes=True, n_workers=6)
client
Out[3]:
Client
Client-1855b170-d293-11ef-ab3b-424bb805562b
| Connection method: Cluster object | Cluster type: distributed.LocalCluster |
| Dashboard: http://127.0.0.1:8787/status |
Cluster Info
LocalCluster
79af5e38
| Dashboard: http://127.0.0.1:8787/status | Workers: 6 |
| Total threads: 12 | Total memory: 36.00 GiB |
| Status: running | Using processes: True |
Scheduler Info
Scheduler
Scheduler-07008db2-dc2f-4b80-b163-572c7365c6c1
| Comm: tcp://127.0.0.1:55292 | Workers: 6 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 12 |
| Started: Just now | Total memory: 36.00 GiB |
Workers
Worker: 0
| Comm: tcp://127.0.0.1:55313 | Total threads: 2 |
| Dashboard: http://127.0.0.1:55315/status | Memory: 6.00 GiB |
| Nanny: tcp://127.0.0.1:55295 | |
| Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-rctg3f6o | |
Worker: 1
| Comm: tcp://127.0.0.1:55314 | Total threads: 2 |
| Dashboard: http://127.0.0.1:55318/status | Memory: 6.00 GiB |
| Nanny: tcp://127.0.0.1:55296 | |
| Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-5oqzsvnd | |
Worker: 2
| Comm: tcp://127.0.0.1:55320 | Total threads: 2 |
| Dashboard: http://127.0.0.1:55323/status | Memory: 6.00 GiB |
| Nanny: tcp://127.0.0.1:55297 | |
| Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-gzut5cgm | |
Worker: 3
| Comm: tcp://127.0.0.1:55307 | Total threads: 2 |
| Dashboard: http://127.0.0.1:55309/status | Memory: 6.00 GiB |
| Nanny: tcp://127.0.0.1:55298 | |
| Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-n69vpbyy | |
Worker: 4
| Comm: tcp://127.0.0.1:55308 | Total threads: 2 |
| Dashboard: http://127.0.0.1:55311/status | Memory: 6.00 GiB |
| Nanny: tcp://127.0.0.1:55299 | |
| Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-5sek23vi | |
Worker: 5
| Comm: tcp://127.0.0.1:55317 | Total threads: 2 |
| Dashboard: http://127.0.0.1:55321/status | Memory: 6.00 GiB |
| Nanny: tcp://127.0.0.1:55300 | |
| Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-djrt1ww7 | |
In [4]:
## Import boundary vector data
sa_bounds = gpd.read_file("./data/sa_boundary.gpkg")
sa_bounds.explore()
Out[4]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [5]:
## Create GeoBox
dx = 1000
bbox_hart = sa_bounds.to_crs("EPSG:9221")
geobox = GeoBox.from_bbox(bbox_hart.total_bounds, crs="EPSG:9221", resolution=dx)
geobox
Out[5]:
GeoBox
Dimensions
1,598x1,419
EPSG
9221
Resolution
1000m
Cell
200px
WKT
PROJCRS["Hartebeesthoek94 / ZAF BSU Albers 25E",
BASEGEOGCRS["Hartebeesthoek94",
DATUM["Hartebeesthoek94",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4148]],
CONVERSION["South Africa Basic Survey Unit Albers 25E",
METHOD["Albers Equal Area",
ID["EPSG",9822]],
PARAMETER["Latitude of false origin",-30,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8821]],
PARAMETER["Longitude of false origin",25,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8822]],
PARAMETER["Latitude of 1st standard parallel",-22,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8823]],
PARAMETER["Latitude of 2nd standard parallel",-38,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8824]],
PARAMETER["Easting at false origin",1400000,
LENGTHUNIT["metre",1],
ID["EPSG",8826]],
PARAMETER["Northing at false origin",1300000,
LENGTHUNIT["metre",1],
ID["EPSG",8827]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Spatial analysis for the purposes of Natural Capital Accounting."],
AREA["South Africa - mainland - onshore and offshore."],
BBOX[-38.17,13.33,-22.13,36.54]],
ID["EPSG",9221]]
BASEGEOGCRS["Hartebeesthoek94",
DATUM["Hartebeesthoek94",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4148]],
CONVERSION["South Africa Basic Survey Unit Albers 25E",
METHOD["Albers Equal Area",
ID["EPSG",9822]],
PARAMETER["Latitude of false origin",-30,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8821]],
PARAMETER["Longitude of false origin",25,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8822]],
PARAMETER["Latitude of 1st standard parallel",-22,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8823]],
PARAMETER["Latitude of 2nd standard parallel",-38,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8824]],
PARAMETER["Easting at false origin",1400000,
LENGTHUNIT["metre",1],
ID["EPSG",8826]],
PARAMETER["Northing at false origin",1300000,
LENGTHUNIT["metre",1],
ID["EPSG",8827]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Spatial analysis for the purposes of Natural Capital Accounting."],
AREA["South Africa - mainland - onshore and offshore."],
BBOX[-38.17,13.33,-22.13,36.54]],
ID["EPSG",9221]]
In [6]:
## Get Sentinel-2 data
#Set parameters
stac_client = 'https://planetarycomputer.microsoft.com/api/stac/v1'
bbox = list(sa_bounds.total_bounds)
time = "2024-03-01/2024-03-31"
# Search data
items = pystac_client.Client.open(
stac_client
).search(
bbox=bbox,
collections=['sentinel-2-l2a'],
datetime=time
).item_collection()
#Load Data
ds_odc = odc.stac.load(
items,
bands=["red", "green", "blue", "nir", "SCL"],
chunks={'time': 1, 'x': 100, 'y': 100},
geobox=geobox,
resampling="bilinear",
patch_url=pc.sign
)
In [7]:
## Create Median Composite
def is_valid_pixel(data):
return ((data > 3) & (data < 7)) | (data==11)
ds_odc['valid'] = is_valid_pixel(ds_odc.SCL)
ds_median = ds_odc.where(ds_odc.valid).median(dim="time")
In [8]:
## Get Digital Elevation Model
# Search data
items = pystac_client.Client.open(
stac_client
).search(
bbox=bbox,
collections=["nasadem"],
).item_collection()
# Load data
dem_odc = odc.stac.load(
items,
bands=["elevation"],
chunks={'time': 1, 'x': 100, 'y': 100},
geobox=geobox,
resampling="bilinear",
patch_url=pc.sign
)
In [ ]:
## Combine datasets
ds_median["elev"] = dem_odc.isel(time = 0).elevation
In [9]:
## Execute
data = ds_median.compute()
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/distributed/client.py:3371: UserWarning: Sending large graph of size 77.41 MiB. This may cause some slowdown. Consider loading the data with Dask directly or using futures or delayed objects to embed the data into the graph without repetition. See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information. warnings.warn( /Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. dest = _reproject( /Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. dest = _reproject( /Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. dest = _reproject( /Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. dest = _reproject(
In [20]:
## Plot RGB data
data.odc.explore(bands=["red","green","blue"],vmax=4000, robust=True)
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/odc/geo/_rgba.py:56: RuntimeWarning: invalid value encountered in cast
return x.astype("uint8")
Out[20]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [22]:
## Compute different spectral indices
data["NDVI"] = spyndex.computeIndex(
index=["NDVI"],
params={
"N": data.nir,
"R": data.red
}
)
data["EVI"] = spyndex.computeIndex(
index=["EVI"],
params={
"C1": spyndex.constants["C1"].value,
"C2": spyndex.constants["C2"].value,
"g": spyndex.constants["g"].value,
"L": spyndex.constants["L"].value,
"N": data.nir,
"R": data.red,
"B": data.blue,
},
)
data["GNDVI"] = spyndex.computeIndex(
index=["GNDVI"],
params={
"N": data.nir,
"G": data.green
}
)
In [23]:
## Compute topographic indices
data["hillshade"] = hillshade(data["elev"])
data["aspect"] = aspect(data["elev"])
data["slope"] = slope(data["elev"])
In [24]:
data = data.compute()
In [25]:
## Plot NDVI
data.NDVI.odc.explore()
Out[25]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [26]:
## Plot Slope
data.slope.odc.explore()
Out[26]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [27]:
## Save dataset
data.to_zarr("./data/dataset.zarr", write_empty_chunks=False)
Out[27]:
<xarray.backends.zarr.ZarrStore at 0x3b7ef7b50>
In [28]:
## Import dataset
data = xr.open_zarr("./data/dataset.zarr")
In [29]:
## Create random sample points
sample_points = sa_bounds.sample_points(1000).to_crs("EPSG:9221")
coords = sample_points.get_coordinates()
In [30]:
## Extract all variables
x = coords.x.to_list()
y = coords.y.to_list()
values_at_pts = data.sel(x=x, y=y, method="nearest")
In [31]:
values_at_pts
Out[31]:
<xarray.Dataset> Size: 56MB
Dimensions: (y: 1000, x: 1000)
Coordinates:
time datetime64[ns] 8B ...
* x (x) float64 8kB 5.875e+05 6.165e+05 ... 2.138e+06 2.162e+06
* y (y) float64 8kB 1.418e+06 1.39e+06 ... 1.536e+06 1.592e+06
Data variables: (12/14)
EVI (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
GNDVI (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
NDVI (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
SCL (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
aspect (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
blue (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
... ...
hillshade (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
nir (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
red (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
slope (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
spatial_ref int32 4B ...
valid (y, x) float64 8MB dask.array<chunksize=(177, 399), meta=np.ndarray>